import HTMLParser
import urllib
import re
import string


urlString = "http://www.jpix.ad.jp/en/user/user.html"
urlText = []
pathOut = "ixp_members.txt"


class Parse21 (HTMLParser.HTMLParser):
    
    check = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "td":
            self.check = True
            
    def handle_endtag (self, tag):
        if tag == "td":
            self.check = False
    
    def handle_data (self, data):
        if self.check == True and re.match("^\s*AS\d+$",data):
            # estraggo i caratteri (cifre) che seguono "AS"
            index = string.find(data, "AS")
            data = data[index+2:]
            if data not in urlText:
                urlText.append(data)
        # il reset di check lo fara' handle_endtag() perche' ci sono 2 AS nello stesso <td>


lparser = Parse21()
lparser.feed(urllib.urlopen(urlString).read())
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "21 %s" % item
fileOut.close()